##### This script loads datasets on four contextual country-level time-varying variables.
##### Then, the script assigns the required contextual information to the respective ESS country-rounds.
##### At the end, the script merges this harmonized information with the individual-level ESS data.

# Creating a function "country_code_2d" for assigning two-digit ISO country code to OECD_data$cntry column 
# (based on three-digit ISO country code from the OECD database)
# this procedure applies only to countries that fielded at least one European Social Survey (ESS) round
# source of country codes:  https://countrycode.org

country_code_2d <- function(OECD_data){
  for(i in 1:dim(OECD_data)[1]){
    if(OECD_data[i,1] == "ALB"){
      OECD_data[i,dim(OECD_data)[2]] <- "AL"
    }else if(OECD_data[i,1] == "AUT"){
      OECD_data[i,dim(OECD_data)[2]] <- "AT"
    }else if(OECD_data[i,1] == "BEL"){
      OECD_data[i,dim(OECD_data)[2]] <- "BE"
    }else if(OECD_data[i,1] == "BGR"){
      OECD_data[i,dim(OECD_data)[2]] <- "BG"
    }else if(OECD_data[i,1] == "HRV"){
      OECD_data[i,dim(OECD_data)[2]] <- "HR"
    }else if(OECD_data[i,1] == "CYP"){
      OECD_data[i,dim(OECD_data)[2]] <- "CY"
    }else if(OECD_data[i,1] == "CZE"){
      OECD_data[i,dim(OECD_data)[2]] <- "CZ"
    }else if(OECD_data[i,1] == "DNK"){
      OECD_data[i,dim(OECD_data)[2]] <- "DK"
    }else if(OECD_data[i,1] == "EST"){
      OECD_data[i,dim(OECD_data)[2]] <- "EE"
    }else if(OECD_data[i,1] == "FIN"){
      OECD_data[i,dim(OECD_data)[2]] <- "FI"
    }else if(OECD_data[i,1] == "FRA"){
      OECD_data[i,dim(OECD_data)[2]] <- "FR"
    }else if(OECD_data[i,1] == "DEU"){
      OECD_data[i,dim(OECD_data)[2]] <- "DE"
    }else if(OECD_data[i,1] == "GRC"){
      OECD_data[i,dim(OECD_data)[2]] <- "GR"
    }else if(OECD_data[i,1] == "HUN"){
      OECD_data[i,dim(OECD_data)[2]] <- "HU"
    }else if(OECD_data[i,1] == "ISL"){
      OECD_data[i,dim(OECD_data)[2]] <- "IS"
    }else if(OECD_data[i,1] == "IRL"){
      OECD_data[i,dim(OECD_data)[2]] <- "IE"
    }else if(OECD_data[i,1] == "ISR"){
      OECD_data[i,dim(OECD_data)[2]] <- "IL"
    }else if(OECD_data[i,1] == "ITA"){
      OECD_data[i,dim(OECD_data)[2]] <- "IT"
    }else if(OECD_data[i,1] == "XKX"){
      OECD_data[i,dim(OECD_data)[2]] <- "XK"
    }else if(OECD_data[i,1] == "LVA"){
      OECD_data[i,dim(OECD_data)[2]] <- "LV"
    }else if(OECD_data[i,1] == "LTU"){
      OECD_data[i,dim(OECD_data)[2]] <- "LT"
    }else if(OECD_data[i,1] == "LUX"){
      OECD_data[i,dim(OECD_data)[2]] <- "LU"
    }else if(OECD_data[i,1] == "NLD"){
      OECD_data[i,dim(OECD_data)[2]] <- "NL"
    }else if(OECD_data[i,1] == "MNE"){
      OECD_data[i,dim(OECD_data)[2]] <- "ME"  
    }else if(OECD_data[i,1] == "NOR"){
      OECD_data[i,dim(OECD_data)[2]] <- "NO"
    }else if(OECD_data[i,1] == "POL"){
      OECD_data[i,dim(OECD_data)[2]] <- "PL"
    }else if(OECD_data[i,1] == "PRT"){
      OECD_data[i,dim(OECD_data)[2]] <- "PT"
    }else if(OECD_data[i,1] == "SRB"){
      OECD_data[i,dim(OECD_data)[2]] <- "RS"
    }else if(OECD_data[i,1] == "ROU"){
      OECD_data[i,dim(OECD_data)[2]] <- "RO"
    }else if(OECD_data[i,1] == "RUS"){
      OECD_data[i,dim(OECD_data)[2]] <- "RU"
    }else if(OECD_data[i,1] == "SVK"){
      OECD_data[i,dim(OECD_data)[2]] <- "SK"
    }else if(OECD_data[i,1] == "SVN"){
      OECD_data[i,dim(OECD_data)[2]] <- "SI"
    }else if(OECD_data[i,1] == "ESP"){
      OECD_data[i,dim(OECD_data)[2]] <- "ES"
    }else if(OECD_data[i,1] == "SWE"){
      OECD_data[i,dim(OECD_data)[2]] <- "SE"
    }else if(OECD_data[i,1] == "CHE"){
      OECD_data[i,dim(OECD_data)[2]] <- "CH"
    }else if(OECD_data[i,1] == "TUR"){
      OECD_data[i,dim(OECD_data)[2]] <- "TR"
    }else if(OECD_data[i,1] == "UKR"){
      OECD_data[i,dim(OECD_data)[2]] <- "UA"
    }else if(OECD_data[i,1] == "GBR"){
      OECD_data[i,dim(OECD_data)[2]] <- "GB"
    }else{
      OECD_data[i,dim(OECD_data)[2]] <- ""
    }
  }
  return(OECD_data)
}



##### Uploading list of ESS country-rounds with a specified KEY QUARTAL (i.e. a quartal, when majority of data per country and ESS round was fielded) and the corresponding KEY YEAR
# key_quartal column refers to the quarter of a year when (the majority of) the fieldwork took place in a particular country round 
# key_year column refers to the year when (the majority of) the fieldwork took place in a particular country round 
ESS_data_context <- read.csv("./FIELDWORK_PERIODS.csv", header = TRUE, sep = ";", dec = ".")



### 1.) LABOUR FORCE PARTICIPATION RATE
# Reading the labour force participation rate from OECD database
# The OECD data was downloaded to contain yearly data from 2000 to 2021
# Source: https://data.oecd.org/emp/labour-force-participation-rate.htm
OECD_LFPR_data <- read.csv("./DP_LIVE_11072022201648574.csv", header = TRUE, sep = ",", dec = ".")
dim(OECD_LFPR_data)

# Assigning the two-digit country code
OECD_LFPR_data$cntry <- rep(NA)
OECD_LFPR_data <- country_code_2d(OECD_LFPR_data)

# Subsetting only OECD countries that are part of the ESS
OECD_LFPR_data_ess <- OECD_LFPR_data[OECD_LFPR_data$cntry != "", ]
rm(OECD_LFPR_data)

table(OECD_LFPR_data_ess$INDICATOR)
table(OECD_LFPR_data_ess$FREQUENCY)
# 33 ESS countries available
length(table(OECD_LFPR_data_ess$cntry))


##### ANALYSIS OF MISSING DATA
# Croatia (HR) as the only country has only 20 data points - NOT A PROBLEM (Croatia is not part of our analyses)
tapply(OECD_LFPR_data_ess$TIME, OECD_LFPR_data_ess$cntry, length)
tapply(OECD_LFPR_data_ess$TIME, OECD_LFPR_data_ess$cntry, range)

### FILLING "ESS_data_context" table with LFPR data
# Creating an empty variable for LFPR during the country-year fieldwork year
ESS_data_context$LFPR_y0 <- rep(NA, nrow(ESS_data_context))
# Creating an empty variable for LFPR during the year previous to the country-year fieldwork year (i.e. t-1)
ESS_data_context$LFPR_ym1 <- rep(NA, nrow(ESS_data_context))

# The assignment of LFPR 
for(i in 1:dim(ESS_data_context)[1]){
  if(dim(table(OECD_LFPR_data_ess$cntry == as.character(ESS_data_context$cntry[i]))) == 2 & table(OECD_LFPR_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_LFPR_data_ess$TIME == as.character(ESS_data_context$key_year[i]))[2] == 1){
    x <- which(OECD_LFPR_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_LFPR_data_ess$TIME == as.character(ESS_data_context$key_year[i]))
    ESS_data_context[i, c("LFPR_ym1", "LFPR_y0")] <- OECD_LFPR_data_ess$Value[(x-1):x]}
}

# checking how many country-year combinations are missing (9)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]

# displaying which countries AND key quartals have missing contextual variables
# since none of these countries are part of our analyses, it is not a problem that these 9 country-years are missing
ESS_data_context[which(is.na(ESS_data_context[,dim(ESS_data_context)[2]])), 3:4]



### 2.) PUBLIC SOCIAL EXPENDITURE (as % of GDP)
# Reading the yearly public social expenditure from OECD database
# The OECD data was downloaded to contain yearly data from 2000 to 2019 (i.e. data for 20 years)
# Source: https://data.oecd.org/socialexp/social-spending.htm
OECD_SOC_EXP_data <- read.csv("./DP_LIVE_14072021180102531.csv", header = TRUE, sep = ",", dec = ".")
dim(OECD_SOC_EXP_data)

# Assigning the two-digit country code
OECD_SOC_EXP_data$cntry <- rep(NA)
OECD_SOC_EXP_data <- country_code_2d(OECD_SOC_EXP_data)

# Subsetting only OECD countries that are part of the ESS
OECD_SOC_EXP_data_ess <- OECD_SOC_EXP_data[OECD_SOC_EXP_data$cntry != "", ]
rm(OECD_SOC_EXP_data)

table(OECD_SOC_EXP_data_ess$INDICATOR)
table(OECD_SOC_EXP_data_ess$FREQUENCY)
# 28 ESS countries available
length(table(OECD_SOC_EXP_data_ess$cntry))


##### ANALYSIS OF MISSING DATA
# Switzerland (CH) is missig the data for 2019 - NOT A PROBLEM (as ESS round 9 was gathered during 2018-Q4 in Switzerland)
tapply(OECD_SOC_EXP_data_ess$TIME, OECD_SOC_EXP_data_ess$cntry, length)
tapply(OECD_SOC_EXP_data_ess$TIME, OECD_SOC_EXP_data_ess$cntry, range)

### FILLING "ESS_data_context" table with SOCEXP data
# Creating an empty variable for SOCEXP during the country-year fieldwork year
ESS_data_context$SOCEXP_y0 <- rep(NA, nrow(ESS_data_context))
# Creating an empty variable for SOCEXP during the year previous to the country-year fieldwork year (i.e. t-1)
ESS_data_context$SOCEXP_ym1 <- rep(NA, nrow(ESS_data_context))

# The assignment of SOCEXP
for(i in 1:dim(ESS_data_context)[1]){
  if(dim(table(OECD_SOC_EXP_data_ess$cntry == as.character(ESS_data_context$cntry[i]))) == 2 & table(OECD_SOC_EXP_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_SOC_EXP_data_ess$TIME == as.character(ESS_data_context$key_year[i]))[2] == 1){
    x <- which(OECD_SOC_EXP_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_SOC_EXP_data_ess$TIME == as.character(ESS_data_context$key_year[i]))
    ESS_data_context[i, c("SOCEXP_ym1", "SOCEXP_y0")] <- OECD_SOC_EXP_data_ess$Value[(x-1):x]}
}

# checking how many country-year combinations are missing (29)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]

# displaying which countries AND key quartals have missing contextual variable
# Bulgaria (BG) and Cyprus (CY) have missing data 
ESS_data_context[which(is.na(ESS_data_context[,dim(ESS_data_context)[2]])), 3:4]

# Eurostat data for Bulgaria (BG) and Cyprus (CY): online data code = SPR_EXP_SUM
# Source: https://ec.europa.eu/eurostat/databrowser/view/SPR_EXP_SUM__custom_1146171/default/table?lang=en
# Percentage of gross domestic product (GDP),	Social protection benefits,	Annual
SOCEXP_BG_CY <- read.csv("./SPR_EXP_SUM__BG_CY_2000_2018.csv", header = TRUE, sep = ";", dec = ".")

### BG
# Bulgaria: essround 3, 2006
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2006, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2006", "X2005")]

# Bulgaria: essround 4, 2009
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2009, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2009", "X2008")]

# Bulgaria: essround 5, 2011
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2011, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2011", "X2010")]

# Bulgaria: essround 6, 2013
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2013, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2013", "X2012")]

# Bulgaria: essround 9, 2018
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2018, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2018", "X2017")]


### CY
# Cyprus: essround 3, 2006
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2006, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2006", "X2005")]

# Cyprus: essround 4, 2008
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2008, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2008", "X2007")]

# Cyprus: essround 5, 2011
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2011, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2011", "X2010")]

# Cyprus: essround 6, 2012
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2012, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2012", "X2011")]

# Cyprus: essround 9, 2018
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2018, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2018", "X2017")]

# checking how many country-year combinations are missing now: 19 (29 - 5 - 5 = 19)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]



### 3.) GROSS DOMESTIC PRODUCT
# Reading the gross domestic product from OECD database
# The OECD data was downloaded to contain yearly data from 2000 to 2021
# Source: https://data.oecd.org/gdp/gross-domestic-product-gdp.htm#indicator-chart
OECD_GDP_data <- read.csv("./DP_LIVE_12072022131229628.csv", header = TRUE, sep = ",", dec = ".")
dim(OECD_GDP_data)

# Assigning the two-digit country code
OECD_GDP_data$cntry <- rep(NA)
OECD_GDP_data <- country_code_2d(OECD_GDP_data)

# Subsetting only OECD countries that are part of the ESS
OECD_GDP_data_ess <- OECD_GDP_data[OECD_GDP_data$cntry != "", ]
rm(OECD_GDP_data)

table(OECD_GDP_data_ess$INDICATOR)
table(OECD_GDP_data_ess$FREQUENCY)
# 35 ESS countries available
length(table(OECD_GDP_data_ess$cntry))


##### ANALYSIS OF MISSING DATA
# Only three countries (AL, RS, RU) that are not part of our analyses have the time series ending is 2020
tapply(OECD_GDP_data_ess$TIME, OECD_GDP_data_ess$cntry, length)
tapply(OECD_GDP_data_ess$TIME, OECD_GDP_data_ess$cntry, range)

### FILLING "ESS_data_context" table with GDP data
# Creating an empty variable for GDP during the country-year fieldwork year
ESS_data_context$GDP_y0 <- rep(NA, nrow(ESS_data_context))
# Creating an empty variable for GDP during the year previous to the country-year fieldwork year (i.e. t-1)
ESS_data_context$GDP_ym1 <- rep(NA, nrow(ESS_data_context))

# The assignment of GDP (and simultaneously transforming it GDP in 1000s USD)
for(i in 1:dim(ESS_data_context)[1]){
  if(dim(table(OECD_GDP_data_ess$cntry == as.character(ESS_data_context$cntry[i]))) == 2 & table(OECD_GDP_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_GDP_data_ess$TIME == as.character(ESS_data_context$key_year[i]))[2] == 1){
    x <- which(OECD_GDP_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_GDP_data_ess$TIME == as.character(ESS_data_context$key_year[i]))
    ESS_data_context[i, c("GDP_ym1", "GDP_y0")] <- OECD_GDP_data_ess$Value[(x-1):x]/1000}
}

# checking how many country-year combinations are missing (7)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]

# displaying which countries AND key quartals have missing GDP per capita
# since none of these countries are part of our analyses, it is not a problem that these 7 country-years are missing
ESS_data_context[which(is.na(ESS_data_context[,dim(ESS_data_context)[2]])), 3:4]



### 4.) INCOME INEQUALITY (Gini coefficient of disposable income)
# Source: SWIID database, Version 9.3: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/LM4OWF

load("./swiid9_3.rda")

class(swiid_summary)
dim(swiid_summary) ### 5910 by 12
colnames(swiid_summary)

# adding an empty column "cntry" as a placeholder for ESS country variable
swiid_summary$cntry <- as.character(rep(NA))

# assigning two-digit ISO country code to OECD_data$cntry column
for(i in 1:dim(swiid_summary)[1]){
  if(swiid_summary[i,1] == "Albania"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "AL"
  }else if(swiid_summary[i,1] == "Austria"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "AT"
  }else if(swiid_summary[i,1] == "Belgium"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "BE"
  }else if(swiid_summary[i,1] == "Bulgaria"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "BG"
  }else if(swiid_summary[i,1] == "Croatia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "HR"
  }else if(swiid_summary[i,1] == "Cyprus"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "CY"
  }else if(swiid_summary[i,1] == "Czech Republic"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "CZ"
  }else if(swiid_summary[i,1] == "Denmark"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "DK"
  }else if(swiid_summary[i,1] == "Estonia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "EE"
  }else if(swiid_summary[i,1] == "Finland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "FI"
  }else if(swiid_summary[i,1] == "France"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "FR"
  }else if(swiid_summary[i,1] == "Germany"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "DE"
  }else if(swiid_summary[i,1] == "Greece"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "GR"
  }else if(swiid_summary[i,1] == "Hungary"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "HU"
  }else if(swiid_summary[i,1] == "Iceland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "IS"
  }else if(swiid_summary[i,1] == "Ireland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "IE"
  }else if(swiid_summary[i,1] == "Israel"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "IL"
  }else if(swiid_summary[i,1] == "Italy"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "IT"
  }else if(swiid_summary[i,1] == "Kosovo"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "XK"
  }else if(swiid_summary[i,1] == "Latvia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "LV"
  }else if(swiid_summary[i,1] == "Lithuania"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "LT"
  }else if(swiid_summary[i,1] == "Luxembourg"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "LU"
  }else if(swiid_summary[i,1] == "Netherlands"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "NL"
  }else if(swiid_summary[i,1] == "Norway"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "NO"
  }else if(swiid_summary[i,1] == "Poland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "PL"
  }else if(swiid_summary[i,1] == "Portugal"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "PT"
  }else if(swiid_summary[i,1] == "Romania"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "RO"
  }else if(swiid_summary[i,1] == "Russia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "RU"
  }else if(swiid_summary[i,1] == "Slovakia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "SK"
  }else if(swiid_summary[i,1] == "Slovenia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "SI"
  }else if(swiid_summary[i,1] == "Spain"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "ES"
  }else if(swiid_summary[i,1] == "Sweden"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "SE"
  }else if(swiid_summary[i,1] == "Switzerland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "CH"
  }else if(swiid_summary[i,1] == "Turkey"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "TR"
  }else if(swiid_summary[i,1] == "Ukraine"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "UA"
  }else if(swiid_summary[i,1] == "United Kingdom"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "GB"
  }else{
    swiid_summary[i,dim(swiid_summary)[2]] <- ""
  }
}

# checking the assignment of country codes
table(swiid_summary$cntry)
length(table(swiid_summary$cntry))
table(swiid_summary$country, swiid_summary$cntry)

# Subsetting only those 36 countries that are part of ESS
swiid_ess <- swiid_summary[swiid_summary$cntry != "", ]
dim(swiid_ess)

##### ANALYSIS OF MISSING DATA
# Ideally, we need data covering period from 2000 to 2018-2019.
# This is the case for all analysed countries except for Iceland (IS).
# The missing values for Iceland 2019 will be imputed based on the latest available value (i.e. 2017)
tapply(swiid_ess$year, swiid_ess$cntry, range)
tapply(swiid_ess$year, swiid_ess$cntry, length)
tapply(swiid_ess$year, swiid_ess$cntry, min)
tapply(swiid_ess$year, swiid_ess$cntry, max)

### checking whether each ESS country has a yearly data; for each country: [(maximum-minimum)+1)/length]
((tapply(swiid_ess$year, swiid_ess$cntry, max)-tapply(swiid_ess$year, swiid_ess$cntry, min))+1)/tapply(swiid_ess$year, swiid_ess$cntry, length)


########## gini_disp: Estimate of Gini index of inequality in equivalized (square root scale) household DISPOSABLE (post-tax, post-transfer) income
# Creating an empty variable for Gini index during the country-year fieldwork year
ESS_data_context$gini_disp_y0 <- rep(NA, nrow(ESS_data_context))
# Creating an empty variable for Gini index during the year previous to the country-year fieldwork year (i.e. t-1)
ESS_data_context$gini_disp_ym1 <- rep(NA, nrow(ESS_data_context))

### FILLING "ESS_data_context" table with SWIID gini_disp data
for(i in 1:dim(ESS_data_context)[1]){
  if(!is.na(table(swiid_ess$cntry == as.character(ESS_data_context$cntry[i]) & swiid_ess$year == as.character(ESS_data_context$key_year[i]))[2]) == TRUE){
    if(dim(table(swiid_ess$cntry == as.character(ESS_data_context$cntry[i]))) == 2 & table(swiid_ess$cntry == as.character(ESS_data_context$cntry[i]) & swiid_ess$year == as.character(ESS_data_context$key_year[i]))[2] == 1){
      x <- which(swiid_ess$cntry == as.character(ESS_data_context$cntry[i]) & swiid_ess$year == as.character(ESS_data_context$key_year[i]))
      ESS_data_context[i, c("gini_disp_ym1", "gini_disp_y0")] <- swiid_ess$gini_disp[(x-1):x]}}
}

# checking how many country-year combinations are missing (3)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]

# Iceland (IS) 2019 is missing
# Assigning the latest availale Gini coeficient (i.e. from 2017) to Iceland 2019
ESS_data_context[ESS_data_context$cntry == "IS" & ESS_data_context$key_year == 2019, c("gini_disp_y0")] <- swiid_ess$gini_disp[swiid_ess$cntry == "IS" & swiid_ess$year == 2017]
ESS_data_context[ESS_data_context$cntry == "IS" & ESS_data_context$key_year == 2019, c("gini_disp_ym1")] <- swiid_ess$gini_disp[swiid_ess$cntry == "IS" & swiid_ess$year == 2017]

# displaying which countries AND key quartals have missing contextual variables (only two country-years that will not be part of the analysis are missing)
ESS_data_context[which(is.na(ESS_data_context[,dim(ESS_data_context)[2]])), 3:4]

# checking how many country-year combinations are missing (after the correction, there are only 2)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]
# remowing the swiid list
rm(swiid)


### deleting leading contextual variables (i.e. contextual variables measured at time t-1)
# (since we are only interested in the effects of contextual variables at the time of the respective fieldwork period)
library(data.table)
contextual_variables <- ESS_data_context

setDT(contextual_variables)
contextual_variables[, c("LFPR_ym1", "SOCEXP_ym1", "GDP_ym1", "gini_disp_ym1") := NULL]



##### Merging three contextual variables with the individual-level ess_4 data
setDT(ess_4)
dim(ess_4) # 34 variables
# Creating a new ess_context datasat, which combines individual-level data with contextual-level data
ess_context <- contextual_variables[ess_4, on = .(cntry, essround)]
dim(ess_context) # 41 variables

# All four contextual variables are available for all country rounds - there are no missing values
table(is.na(ess_context$LFPR_y0))
table(is.na(ess_context$SOCEXP_y0))
table(is.na(ess_context$GDP_y0))
table(is.na(ess_context$gini_disp_y0))
